Attaching package: 'janitor'
The following objects are masked from 'package:stats':
chisq.test, fisher.test
library(lubridate)
Attaching package: 'lubridate'
The following objects are masked from 'package:base':
date, intersect, setdiff, union
library(broom)library(tsibble)
Attaching package: 'tsibble'
The following object is masked from 'package:lubridate':
interval
The following objects are masked from 'package:base':
intersect, setdiff, union
Install package "strengejacke" from GitHub (`devtools::install_github("strengejacke/strengejacke")`) to load all sj-packages at once!
library(ggcorrplot)library(car)
Loading required package: carData
Attaching package: 'car'
The following object is masked from 'package:purrr':
some
The following object is masked from 'package:dplyr':
recode
library(modelr)
Attaching package: 'modelr'
The following object is masked from 'package:broom':
bootstrap
Loading in the Data & Data Wrangling
# setting my root directoryrootdir <- ("/Users/colleenmccamy/Documents/MEDS/EDS_222_Stats/final_project")# reading in the dataeia_data_raw <-read_csv(paste0(rootdir, "/data/eia_data.csv"))
Warning: One or more parsing issues, see `problems()` for details
Rows: 35436 Columns: 6
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (4): region, region_name, system_operator, stystem_operator_name
dbl (1): hourly_energy_mwh
dttm (1): date
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# cleaning the data to be the two variables of interesteia_df <- eia_data_raw |>select(date, hourly_energy_mwh) |>na.omit()# creating a time series dataframeeia_ts <- eia_df |>as_tsibble()
Using `date` as index variable.
Adding Temperature Data
# loading in the temperature datatemp_data <-read_csv(paste0(rootdir, "/data/sd_temp_data.csv"))
Rows: 1522 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (4): Date, temp_max, temp_avg, temp_dept
dbl (1): temp_min
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
Merging the Data
# restructuring the eia data to merge the dataset with the temperature data by dateeia_data <- eia_df |>mutate(time = (date)) |>mutate(date =as.Date(date))eia_data$time <-format(eia_data$time, format ="%H:%M:%S")# merging the data into one dataframeenergy_temp_df <-left_join(x = eia_data,y = temp_data,by ="date")
Grouping Data by Day
# creating dataframe for tou peak horustou_peak_hours_df <- energy_temp_df |>filter(time >=16& time <=21)# grouping it for daily peak hours to plot with daily maximum temperaturedaily_peak_hrs_df <- tou_peak_hours_df |>group_by(date) |>summarize(daily_energy_mwh =sum(hourly_energy_mwh))
Basic Visualization
# exploring the data by plotting energy demand throughout timeenergy_demand_plot <-ggplot(data = eia_df,aes(x = date, y = hourly_energy_mwh)) +geom_line(col ="#b52b8c") +labs(title ="Hourly Energy Demand (MWh)",x ="Date",y ="MWh") +theme_minimal() +theme(plot.title =element_text(hjust =0.5))# plotting daily peak energy demand with daily max temperaturespeak_demand_plot <-ggplot(data = daily_peak_hrs_df,aes(x = date, y = daily_energy_mwh)) +geom_line(col ="#b52b8c") +labs(title ="Hourly Energy Demand (MWh)",x ="Date",y ="MWh") +theme_minimal() +theme(plot.title =element_text(hjust =0.5))# exploring the data by plotting maximum temperature throughout timemax_temp_plot <-ggplot(temp_data, aes(x = date, y = temp_max)) +geom_line(col ="#52796f") +labs(title ="Maximum Temperature per day (°F)",x ="Date",y ="Max Temperature (°F)") +theme_minimal() +theme(plot.title =element_text(hjust =0.5))# plotting along with daily temperatureggarrange(peak_demand_plot, max_temp_plot,ncol =2, nrow =1)
Visualizing Daily Demand for Peak Hours
# peak_demand_plot <- ggplot(data = daily_peak_hrs_df,# aes(x = date, # y = daily_energy_mwh)) +# geom_line(col = "#b52b8c") +# labs(title = "Hourly Energy Demand (MWh)",# x = "Date",# y = "MWh") +# theme_minimal() +# theme(plot.title = element_text(hjust = 0.5))# peak_demand_plot# # View(test)# # plotting along with daily temperature# ggarrange(peak_demand_plot, max_temp_plot,# ncol = 2, nrow = 1)
Determining a “Hot Day”
### ---- Determining a "Hot Day" ---- # determining the mean and standard deviation for the time period of interestmean_max_temp <-mean(energy_temp_df$temp_max, na.rm =TRUE)sd_max_temp <-sd(energy_temp_df$temp_max, na.rm =TRUE)print(mean_max_temp)
[1] 72.02147
print(sd_max_temp)
[1] 6.934419
# preparing the data to plotbox_data <-as_tibble(energy_temp_df$temp_max)# plotting the mean and standard deviationtemp_box <-ggplot(box_data) +geom_boxplot(aes(x = value)) +labs(x ="Maximum Daily Temperature (°F)") +theme_minimal()temp_box
# conducting the modelmodel_int_tou_peak_demand <-lm(formula = hourly_energy_mwh ~ tou_policy + peak_hours + hot_day + peak_hours * tou_policy,data = energy_temp_df)# getting the output of the modelsummary(model_int_tou_peak_demand)
Call:
lm(formula = hourly_energy_mwh ~ tou_policy + peak_hours + hot_day +
peak_hours * tou_policy, data = energy_temp_df)
Residuals:
Min 1Q Median 3Q Max
-1487.56 -292.30 -51.44 279.56 2028.45
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2154.551 3.462 622.296 <2e-16 ***
tou_policy -83.108 5.331 -15.590 <2e-16 ***
peak_hours -322.250 6.583 -48.955 <2e-16 ***
hot_day 409.116 6.438 63.552 <2e-16 ***
tou_policy:peak_hours -98.959 10.633 -9.307 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 424.6 on 36001 degrees of freedom
(48 observations deleted due to missingness)
Multiple R-squared: 0.2147, Adjusted R-squared: 0.2146
F-statistic: 2461 on 4 and 36001 DF, p-value: < 2.2e-16
# adding output to a tabletab_model(model_int_tou_peak_demand,pred.labels =c("Intercept", "TOU Policy In Effect", "During Peak Hours", "Max. Temp above 80 (°F)","TOU Policy & Peak Hours"),dv.labels =c("Hourly Electricity Demand (MWh)"),string.ci ="Conf. Int (95%)",string.p ="P-value",title ="Table 2. Linear Model Results for Predictors on Hourly Electricity Demand with an Interaction Addition",digits =2)
Table 2. Linear Model Results for Predictors on Hourly Electricity Demand with an Interaction Addition
Hourly Electricity Demand (MWh)
Predictors
Estimates
Conf. Int (95%)
P-value
Intercept
2154.55
2147.77 – 2161.34
<0.001
TOU Policy In Effect
-83.11
-93.56 – -72.66
<0.001
During Peak Hours
-322.25
-335.15 – -309.35
<0.001
Max. Temp above 80 (°F)
409.12
396.50 – 421.73
<0.001
TOU Policy & Peak Hours
-98.96
-119.80 – -78.12
<0.001
Observations
36006
R2 / R2 adjusted
0.215 / 0.215
Classical Decomposition
x =seq(from =ymd('2018-07-1'), length.out =1481,by='day')# preparing the dataframe for the time seriesdecom_df <- energy_temp_df |>group_by(date) |>summarize(daily_energy_mwh =sum(hourly_energy_mwh)) |>mutate(index = x)decom_ts <-as_tsibble(decom_df, index = index)# conducting the classical decomposition and plotting it - season is 365 daysdecom_plot_annual <-model(decom_ts, classical_decomposition(daily_energy_mwh ~season(365), type ="additive")) |>components() |>autoplot(col ="#3d405b") +theme_minimal() +labs(title ="Classical Decomposition Model",subtitle ="Seasonality defined as 365 days",x ="Date",caption ="Figure 4")# conducting the classical decomposition and plotting it - season is 30 days decom_plot_monthly <-model(decom_ts, classical_decomposition(daily_energy_mwh ~season(30), type ="additive")) |>components() |>autoplot(col ="#3d405b") +theme_minimal() +labs(title ="Classical Decomposition Model",subtitle ="Seasonality defined as 30 days",x ="Date",caption ="Figure 3")# stacking the two plotsggarrange(decom_plot_monthly, decom_plot_annual,ncol =1, nrow =2)